notebook.community

Edit and run



In [2]:

    
import psycopg2
import csv, sys
from collections import defaultdict
import string, re, operator, json
from datetime import datetime



In [3]:

    
conn = psycopg2.connect("host=localhost dbname=cooccurrence user=vagrant password=vagrant")



In [3]:

    
# cur = conn.cursor()
# cur.execute("SELECT * FROM documents_document LIMIT 10")
# cur.fetchone()



In [4]:

    
# Load articles to work with 
# NOTE: CHANGE PATH TO work/ forlder under utils
path_to_store = "work/"
# Read web articles
# UNOTE: PLOADED TO DRIVE
f = open('/vagrant/julia/VerbalPhraseDetection/popularity/data/web1.csv', 'r')
web = {}
for el in f.readlines():
    splitted = el.split('_____@@@@@_____')
    if len(splitted) > 4:
        web[splitted[0]] = splitted[1:]



In [128]:

    
# # Read tweets articles
# f = open('/vagrant/julia/VerbalPhraseDetection/popularity/data/tweets.csv', 'r')
# tweets = {}
# i = 0
# for el in f:
#     if i > 100:
#         break
#     splitted = el.split('_____@@@@@_____')
#     if len(splitted) > 4:
#         tweets[splitted[0]] = splitted[1:-1]
#     i += 1



In [129]:

    
# datetime.strptime(web.items()[0][1][1].split('.')[0], "%Y-%m-%d %H:%M:%S").isoformat(" ")



In [5]:

    
# Get concepts/literals to scan the docs
# NOTE: PATH TO THE LEXICON UPLOADED TO DRIVE
concept_literals = []
concept_literals_map = defaultdict(list)
f = open('/vagrant/julia/VerbalPhraseDetection/popularity/data/theme_climate_5k_plus_clean_non-clean.json', 'r')
j = json.loads(f.read())
for el in j:
    try:
        concept_literals.append((el['main'].decode('utf-8').encode('ascii','ignore'), el['id'], 1.))
        concept_literals_map[el['id']].append(el['main'].decode('utf-8').encode('ascii','ignore'))
    except Exception, e:
        continue
    for lit in el['literals']:
        try:
            concept_literals.append((lit.decode('utf-8').encode('ascii','ignore'), el['id'], 1.))
            concept_literals_map[el['id']].append(lit.decode('utf-8').encode('ascii','ignore'))
        except Exception, e:
            continue

Insert into the DB



In [27]:

    
# NOTE: PATH TO DATALIB
sys.path.append('/vagrant/julia/datalib/')
import datalib

# Initialize the tokenizer with the literals
datalib.tokenizers.AbbrAwareStemmer(datalib.tokenizers.SnowballStemmer('english'))
token = datalib.VocabularyTokenizer
tokenizer = token.build(
    concept_literals, [],                                                                          
    datalib.tokenizers.AbbrAwareStemmer(datalib.tokenizers.SnowballStemmer('english'))
)



In [29]:

    
def add_cooccurrences(concepts, doc_id, cur):
    for con1 in concepts[:-1]:
        for con2 in concepts[1:]:
            if con1[3] < con2[3]:
                if concept_literals_map[con1[1]][0] < concept_literals_map[con2[1]][0]:
                    cur.execute('INSERT INTO cooccurrences_sentence VALUES (DEFAULT, %s, %s, NULL);',
                        (con1[0], con2[0]))
                else:
                    cur.execute('INSERT INTO cooccurrences_sentence VALUES (DEFAULT, %s, %s, NULL);',
                        (con2[0], con1[0]))



In [26]:

    
cur.close()
conn.rollback()
for k, v in web.items():
    cur = conn.cursor()
    doc_id = k
    identifier = v[0]
    publish_date = datetime.strptime(v[1].split('.')[0].split('+')[0], "%Y-%m-%d %H:%M:%S")
    title = v[2]
    text = v[3]
    cur.execute("INSERT INTO documents_document(id, published_date, title, extracted_text, identifier) VALUES(%s, %s, %s, %s, %s);", 
                (doc_id, publish_date, title, text, identifier))
    conn.commit()
    cur.close()
print "Done!"

conn.rollback()
for k, v in concept_literals_map.items()[10:]:
    cur = conn.cursor()
    concept_id = k
    main_name = v[0]
    cur.execute('INSERT INTO concepts_concept(id, name) VALUES (%s, %s)',
                (concept_id, main_name))
    cur.execute('INSERT INTO concepts_literal VALUES (DEFAULT, NULL, %s, %s, %s);',
                (main_name, concept_id, True))
    for s in v[1:]:
        cur.execute('INSERT INTO concepts_literal VALUES (DEFAULT, NULL, %s, %s, %s);',
                    (s, concept_id, False))
    conn.commit()
    cur.close()
print "Done!"

conn.rollback()
# occurrence_info = defaultdict(list)
for doc_id, doc in web.items():
    cur = conn.cursor()
    text = doc[2].decode('utf-8').encode('ascii','ignore') + '\n' + doc[3].decode('utf-8').encode('ascii','ignore')
    data = tokenizer.analyze((text))
    for di in data:
        for key, value in di.items():
            cur.execute('INSERT INTO concepts_occurrence VALUES (DEFAULT, %s, %s, %s, %s);',
                (key.id, doc_id, value.positions[0][0], value.positions[-1][0] - value.positions[0][0] + value.positions[-1][1]))
    conn.commit()
    cur.close()









    



Done!



In [30]:

    
conn.rollback()
# occurrence_info = defaultdict(list)
# j = 10000
for doc_id, doc in web.items():
#     if j % 1000 == 0:
#         print j
    text = doc[2].decode('utf-8').encode('ascii','ignore') + '\n' + doc[3].decode('utf-8').encode('ascii','ignore')
    cur = conn.cursor()
    cur.execute('SELECT * FROM concepts_occurrence WHERE document_id=%s;', (doc_id,) )
    occurrence_info = cur.fetchall()
    occurrence_info = sorted(occurrence_info, key=lambda x:x[3])
    current_concept = 0
    for i, el in enumerate(occurrence_info[:-1]):
        # Add terms to cooccurrence if in the same sentence
        # If there is a dot between cur and next and there are more then 1 word
        # print "............."
        # print occurrence_info[doc[0]][current_concept][1][0][0], occurrence_info[doc[0]][i+1][1][0][0]
        # print text[occurrence_info[doc[0]][current_concept][1][0][0]:occurrence_info[doc[0]][i+1][1][0][0] + 20]
        # print re.findall("\S*[^\w\s]\S*", text[occurrence_info[doc[0]][current_concept][1][0][0]:occurrence_info[doc[0]][i+1][1][0][0]])
        if el[3] < occurrence_info[i+1][3] and \
            re.findall("\S*[^\w\s\-]\S*", text[occurrence_info[current_concept][3]:occurrence_info[i+1][3]]):
            if current_concept + 1 < i:
                add_cooccurrences(occurrence_info[current_concept:i], doc_id, cur)
            current_concept = i + 1
    conn.commit()
    cur.close()
#     j += 1









    



---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-30-181516f87a04> in <module>()
     24     conn.commit()
     25     cur.close()
---> 26     j += 1

TypeError: 'int' object is not iterable

Get Verbal Phrases for Top



In [6]:

    
import sys, json
import getopt, codecs, time, pickle
from collections import Counter
from nltk.stem.snowball import SnowballStemmer
from outputVerbalPhrase import * 
from concept_occurrence import *
from get_verbal_phrase import *
from get_parse_tree import parse_fileTextBlob
sys.path.append(".")
sys.path.append("/opt/texpp")



In [7]:

    
def find_punkt(text, con_pos, where):
    p = re.compile("[^\w\s\-]")
    if where == 'left':
        position = 0
        for m in p.finditer(text[0: con_pos]):
            position = m.start()
    else:
        position = con_pos
        for m in p.finditer(text[con_pos:]):
            position = m.start()
            break
    return position

def find_sentence(text, con1_pos, con2_pos):
    if con1_pos < con2_pos:
        return find_punkt(text, con1_pos, 'left')+1,con2_pos + find_punkt(text, con2_pos, 'right')
    else:
        return find_punkt(text, con2_pos, 'left')+1,con1_pos + find_punkt(text, con1_pos, 'right')



In [8]:

    
# NOTE: PATH TO PARSE TREES
parse_trees_path = '/vagrant/julia/VerbalPhraseDetection/tweetstorm/data/parse_trees/'
# NOTE: PATH TO STANFORD PARSER
parser_path = '/vagrant/stanford-parser-2012-11-12/lexparser.sh'
labels_map = json.loads(open('/vagrant/julia/VerbalPhraseDetection/tweetstorm/data/concepts_with_synonyms.json', 'r').read())
smart = 2
# Get most coocccurred - 1st for loop
cur = conn.cursor()
cur.execute('SELECT * FROM cooccurrences_sentence LIMIT 1;')
cooccurrence_info = cur.fetchone()
conn.commit()
cur.close()
print cooccurrence_info
# For docs for the most cooccurred - 2nd for loop
    # Get cooccurrence for documents
parse_tree = ""









    



(21309, 3462931, 3462933, None)



In [9]:

    
# Get concept id of the most cooccurred
cur = conn.cursor()
cur.execute('select c1.concept_id, c2.concept_id, count(*) from cooccurrences_sentence \
join concepts_occurrence as c1 on cooccurrences_sentence.concept_occurrence_id_1 = c1.id \
join concepts_occurrence as c2 on cooccurrences_sentence.concept_occurrence_id_2 = c2.id \
group by c1.concept_id, c2.concept_id \
order by count(*) DESC \
limit 30;')
cons = cur.fetchall()
conn.commit()
cur.close()



In [42]:

    
# # Get cooccurrences for the inserted document with id=1
# cur = conn.cursor()
# cur.execute('select c1.concept_id, c2.concept_id, count(*) \
# from cooccurrences_sentence join concepts_occurrence as c1 \
# on cooccurrences_sentence.concept_occurrence_id_1 = c1.id \
# join concepts_occurrence as c2 \
# on cooccurrences_sentence.concept_occurrence_id_2 = c2.id \
# where c1.document_id=1 \
# group by c1.concept_id, c2.concept_id \
# order by count(*) DESC \
# limit 30;')
# cons = cur.fetchall()
# conn.commit()
# cur.close()



In [125]:

    
import nltk
from nltk.corpus import wordnet as wn
def fill_union_find_for_synset(verb, unique_verbs, union_find, debug=0):
    # check other if it is in synset of mine
    for k, v in unique_verbs.items():
        if k != verb:
            if k in unique_verbs[verb]['synset']:
                if debug:
                    print "Verb - %s - is in synset of %s" % (k, verb)
                union_find[v['counter']] = unique_verbs[verb]['counter']
    # check if this verbs in synset of other
    for k, v in unique_verbs.items():
        if verb != k:
            if verb in v['synset']:
                union_find[unique_verbs[verb]['counter']] = v['counter']
                if debug:
                    print "Verb - %s - is in synset of %s" % (verb, k)
                return [(kk, vv) for kk, vv in unique_verbs.items() if vv['counter']==v['counter']][0]
    return verb, unique_verbs[verb]

# Main function to fold input triplets
def get_folded_statistics(cleaned_triplets, debug=0):
    count_unique = 0
    unique_verbs = defaultdict(dict)
    union_find_for_cleaned = [i for i in xrange(len(cleaned_triplets))]
    statistics_cleaned = {'->': defaultdict(int), '<-': defaultdict(int)}
    for tr in cleaned_triplets:
        res = ""
        if debug:
            print "Initial:", tr.final_verbal_phrase
        if 'to ' in tr.final_verbal_phrase:
            verb_to_add = tr.final_verbal_phrase.split('to ')[1]
        else:
            verb_to_add = tr.final_verbal_phrase
        if debug:
            print "Deleted to: ", verb_to_add
        if ' and ' in verb_to_add:
            verb_to_add = verb_to_add.split(' and ')[0]
        if debug:
            print "Deleted and: ", verb_to_add
        words = nltk.pos_tag(verb_to_add.split(' '))
        if debug:
            print "POS: ", words
        while len(words) > 0 and \
            not words[0][1].startswith('N') and \
            not words[0][1].startswith('V') and \
            not words[0][1].startswith('J'): #  ('IN' and 'RB'):
            if debug:
                print "Tag for the word: ", words[0][1], words[0][1].startswith('N'), words[0][1].startswith('V')
            if words[0][1] == 'RB':
                res = words[0][0] + ' '
            words.pop(0)
        if debug:
            print "Verbal phases after deleting start with bad POS: ", words
        if words:
            res += words[0][0]
            # Synset folding
            if words[0][0] not in unique_verbs:
                unique_verbs[words[0][0]]['counter'] = count_unique
                count_unique += 1
                unique_verbs[words[0][0]]['synset'] = \
                    dict(sorted(Counter([re.sub("_", " ", x.name()) 
                         for bla in [x.lemmas() 
                                     for x in wn.synsets(words[0][0], pos=wn.VERB)] 
                         for x in bla]).items(), key=lambda x:x[1], reverse=True)[0:5]).keys()
                if debug:
                    print 'Synsets and counter for the verb: ', words[0][0], unique_verbs[words[0][0]]
                parent_verb = fill_union_find_for_synset(words[0][0], unique_verbs, union_find_for_cleaned, debug)
                if debug:
                    print "Parent verb - %s, for verb - %s" % (parent_verb[0], words[0][0])
            if debug:
                print "First word: ", res
            if len(words) == 1 and words[0][1] == 'DT':
                continue
            if len(words) > 1 and words[1][1] == 'IN':
                res += ' ' + words[1][0]
                if debug:
                    print "Final phrase: ", res
            elif len(words) > 2 and words[2][1] == 'IN':
                res += ' ' + words[1][0] + ' ' + words[2][0]
                if debug:
                    print "Final phrase: ", res
            if tr.subject == 0:
                statistics_cleaned['->'][res] += 1
                if not 'representations->' in unique_verbs[words[0][0]]:
                    unique_verbs[words[0][0]]['representations->']  = [res]
                else:
                    unique_verbs[words[0][0]]['representations->'].append(res)
            else:
                statistics_cleaned['<-'][res] += 1
                if not 'representations<-' in unique_verbs[words[0][0]]:
                    unique_verbs[words[0][0]]['representations<-']  = [res]
                else:
                    unique_verbs[words[0][0]]['representations<-'].append(res)
#     print "[CLEANED]"
    print "->"
    res_right = []
    for kk, vv in sorted([(k, v) for k, v in statistics_cleaned['->'].items()], key=lambda x:x[1], reverse=True)[0:5]:
        if kk.split()[0] not in res_right:
            try:
                print kk, len(unique_verbs[kk.split()[0]]['representations->'])
            except:
                continue
            res_right.append(kk.split()[0])
    print "<-"
    res_left = []
    for kk, vv in sorted([(k, v) for k, v in statistics_cleaned['<-'].items()], key=lambda x:x[1], reverse=True)[0:5]:
        if kk.split()[0] not in res_left:
            try:
                print kk, len(unique_verbs[kk.split()[0]]['representations<-'])
            except:
                continue
            res_left.append(kk.split()[0])
    return sorted([(k, v) for k, v in statistics_cleaned['->'].items()], key=lambda x:x[1], reverse=True), sorted([(k, v) for k, v in statistics_cleaned['<-'].items()], key=lambda x:x[1], reverse=True), unique_verbs, union_find_for_cleaned

Main routine to get the verbal phrases



In [127]:

    
result_to_rank = []
import nltk
file_type = 'news'
num_pages = (0,500)
# for top cooccurred concepts
for con in cons:
    cur = conn.cursor()
    cur.execute('select c1.id, c1.concept_id, c1.position, c1.length, c2.id, c2.concept_id, c2.position, c2.length, c1.document_id from cooccurrences_sentence \
    join concepts_occurrence as c1 on cooccurrences_sentence.concept_occurrence_id_1 = c1.id \
    join concepts_occurrence as c2 on cooccurrences_sentence.concept_occurrence_id_2 = c2.id \
    where c1.concept_id = %s and c2.concept_id = %s;', (con[0], con[1],) )
    occurrences = cur.fetchall()
    conn.commit()
    cur.close()
    id_parse_trees = {}
    count_of_being_one_concept = 0
    output_name_pickle = 'work/ipython_issue_discovery/%s-statistics-%s-%s-%d-%d.pickle' % (file_type, con[0], con[1], num_pages[0], num_pages[1])
    cname1 = ""
    cname2 = ""
    if os.path.exists(output_name_pickle): # and file_type == 'twitter':
        triplets = pickle.loads( open(output_name_pickle, 'r').read() )
    else:
        for occs in occurrences[0:500]:
            cur = conn.cursor()
            cur.execute('SELECT * FROM concepts_occurrence WHERE id=%s;', (occs[0],) )
            occurrence1 = cur.fetchone()
            cur.execute('SELECT * FROM concepts_concept WHERE id=%s;', (occs[1],) )
            con1 = cur.fetchone()
            cur.execute('SELECT * FROM concepts_occurrence WHERE id=%s;', (occs[4],) )
            occurrence2 = cur.fetchone()
            if occurrence2[3] > occurrence1[3] and 0 < occurrence2[3] - occurrence1[3] - occurrence1[4] < 2:
                count_of_being_one_concept += 1
                continue
            if occurrence2[3] < occurrence1[3] and 0 < occurrence1[3] - occurrence2[3] - occurrence2[4] < 2:
                count_of_being_one_concept += 1
                continue
            cur.execute('SELECT * FROM concepts_concept WHERE id=%s;', (occs[5],) )
            con2 = cur.fetchone()
            cur.execute('SELECT * FROM documents_document WHERE id=%s;', (occs[8],) )
            document = cur.fetchone()
            doc_id = document[0]
            doc_text = document[2].decode('utf-8').encode('ascii','ignore') + '\n' + document[6].decode('utf-8').encode('ascii','ignore')
            conn.commit()
            cur.close()

            start, end = find_sentence(doc_text, occurrence1[3], occurrence2[3]+occurrence2[4])
            text = doc_text[start:end]
#             print "TEXT:", text

            id_to_store = str(doc_id) + '_' + str(start) + '_' + str(end)
            # PREPARE PARSE TREES: utils/get_parse_tree.py
            parse_tree_info = parse_fileTextBlob(parse_trees_path + id_to_store, text, parser_path, str(id_to_store), smart, {}, None)
            id_parse_trees[parse_tree_info[0]] = parse_tree_info[1]

        # COLLECT TRIPLETS
        triplets = parse_triplets(id_parse_trees=id_parse_trees, 
                              labels_map=labels_map, 
                              concepts_to_find=[con1[2],con2[2]], 
                              parser_path=parser_path, debug=0)
#         open(output_name_pickle, 'w'). write( pickle.dumps(triplets) )
    cur = conn.cursor()
    cur.execute('SELECT * FROM concepts_concept WHERE id=%s;', (occurrences[0][1],) )
    con1 = cur.fetchone()
    cname1 = con1[2]
    cur.execute('SELECT * FROM concepts_concept WHERE id=%s;', (occurrences[0][5],) )
    con2 = cur.fetchone()
    cname2 = con2[2]
    print '\n', cname1, 'VS', cname2
    print count_of_being_one_concept / 500.0
    # INITIAL TRIPLET CLEANING : from utils/outputVerbalPhrase.py
    cleaned_triplets = clean_triplets([tr[0] for tr in triplets])
    statistics = get_statistics([tr[0] for tr in triplets])
    # GET FOLDED STATISTICS
    f, b, nn, mm = get_folded_statistics(cleaned_triplets)
    r_t_r = (' '.join([cname1, 'VS', cname2]), 
             count_of_being_one_concept / 500.0, 
             f, b)
    result_to_rank.append(r_t_r)









    



Carbon VS Pollution
0.0
->
curb by 1
make 1
taxchanges 1
<-
best measure by 2

US Environmental Protection Agency VS regulation
0.0
->
issue 9
propose 11
determine that 3
<-
feel about 1
come from 1
ever issue by impose by 1

Coal VS Power Plant
0.0
->
fire 11
burn 2
describe 2
cool 2
be 1
<-
use than 4
begin bring in 1
burn 1
switch from 1
produce 1

Oil VS natural gas
0.0
->
produce 2
typically combine with replace as 1
typically situate in rise while 1
<-
replace in 3
limit 2
decline 1
Is 1

Coal VS natural gas
0.0
->
use 3
extraction from 2
transport than 2
plantsPulverized with 1
fill in 1
<-
displace 7
idle here in 3
surpass as 3
decrease 2
take over 3

Drilling VS Oil
0.0
->
extract from 5
drill in 3
collect 2
<-
announce that 4
leave 1
navigate through 1
typically require begin as 1

Fossil VS Fuel
0.0
->
<-

Pollution VS Water
0.0
->
find in 2
Were pump endanger 1
get into 1
cloud 1
<-
detect 3
address by 2
deplete of 1
set for 1
necessarily mean from 
Climate change VS War
0.0
->
indirectly increase by directly cause grow 3
were 2
exacerbate in 1
<-
emerge as 1
interlink with 1

Carbon VS US Environmental Protection Agency
0.0
->
remember that 1
axe in 1
oppose 1
offer 1
<-
propose on 34
limit from 17
cut from 13
issue 8

Drilling VS shale
0.0
->
head on 63
generally require underground under 2
perfect in 2
penetrate pumped down 2
<-
bring on 4
pay 1

Climate change VS effects of climate
0.0
->
need from 1
<-

Carbon VS Prices
0.0
->
represent 2
dont 2
store by 1
make sure that 1
destroy that 1
<-
represent 2
set for 2
secure from 2
reflect 2
signalThats 2

Flood VS Water
0.0
->
share point out 2
By 2
result in 1
ineffective at 1
calculate base on 1
<-
tackle in 1
face from 1
percolate well for 1
mean that 1

Climate change VS Risk
0.0
->
increase 21
pose 11
put at 5
endanger by 3
result in 2
<-
adapt 3
low if 1

Coal VS Pollution
0.0
->
gasify 2
burn 2
completely eliminate stateand show that 2
emit 2
<-
associate with 3
result from 1

Pollution VS US Environmental Protection Agency
0.0
->
ensure that 1
announce by 1
offer 1
<-
release in 22
limit from 18
propose 18
issue 14
cut from 10

Drilling VS fracking
0.0
->
generally require utilize 2
effectively banned by access 1
contaminate 1
<-
densely populated dangerous 2
run 1
enable in 1
get with 1

Pollution VS fracking
0.0
->
raise with 2
pathways from 2
associate with 1
blame on 1
drill 1
<-
cause 6
fail amid about 3
determine whether 2
link 2
actually produce between 
Climate change VS mankind
0.0
->
cause on 23
particularly fuelsincreased already affect on endanger by 3
<-
not is contribute 3
interface with 3
solve 3
avoid 2

Coal VS Emissions
0.0
->
describe 2
recently issue become 2
escape because 2
even combine with <-
certainly fall as involve in 2
make unlikely that 2
prevent 2
contain than 1

Drilling VS natural gas
0.0
->
extract from 5
open in 2
associate with 1
begin as 1
create for 1
<-
boom in 5
free from 2
help 1
reach 1

Governance VS regulation
0.0
->
force 97
drive about 6
introduce 6
govern 5
impose on 5
<-
introduce by 1
guide on 1
give out 1
propose by 1
use 1

Coal VS Oil
0.0
->
surpass as 6
increase 2
exportsParallel 2
equal as 1
even surpass as <-
pass on 2
announce abandon for 2
transport 3
open up 1

Climate VS Climate change
0.0
->
anomalies as 6
expose alike 5
add about 2
spend in 2
reduce by 2
<-
not distinguished from be 1
Check for 1
publish by 1
real as 1

Fuel VS natural gas
0.0
->
use 2
range from 1
Asias 1
deter 1
combine from 1
<-
acceptable 6
bridge 5
become for 6
replace 5
play as 3

Oil VS Prices
0.0
->
develop in 2
probably wont though decline in 2
create anchor on 1
make sure that 1
<-
drive away from 11
suddenly make tie 4
displace 2
surge in 2

Climate VS warming
0.0
->
anomalies as 6
hold 4
be indiscernible from 2
mitigate 1
over-project 1
<-

Coal VS US Environmental Protection Agency
0.0
->
see on 2
control 1
build with 2
significantly issued during head 1
<-
base on 4
control 3
regulate 4
make 3
rule 3

Adaptation VS Climate change
0.0
->
base on 3
also lead Is 1
offset 1
inform by 1
<-
drive within 2
release in 2
help 1
address through 1
identify 1



In [96]:

    
sorted(result_to_rank, key=lambda x:x[1])[:-3]









    Out[96]:





[]



In [52]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [17]:

    
n_text = """The environmental movement has advanced three arguments in recent years for giving up fossil fuels: (1) that we will soon run out of them anyway; (2) that alternative sources of energy will price them out of the marketplace; and (3) that we cannot afford the climate consequences of burning them.

These days, not one of the three arguments is looking very healthy. In fact, a more realistic assessment of our energy and environmental situation suggests that, for decades to come, we will continue to rely overwhelmingly on the fossil fuels that have contributed so dramatically to the world’s prosperity and progress.

In 2013, about 87% of the energy that the world consumed came from fossil fuels, a figure that—remarkably—was unchanged from 10 years before. This roughly divides into three categories of fuel and three categories of use: oil used mainly for transport, gas used mainly for heating, and coal used mainly for electricity.

Over this period, the overall volume of fossil-fuel consumption has increased dramatically, but with an encouraging environmental trend: a diminishing amount of carbon-dioxide emissions per unit of energy produced. The biggest contribution to decarbonizing the energy system has been the switch from high-carbon coal to lower-carbon gas in electricity generation.

On a global level, renewable energy sources such as wind and solar have contributed hardly at all to the drop in carbon emissions, and their modest growth has merely made up for a decline in the fortunes of zero-carbon nuclear energy. (The reader should know that I have an indirect interest in coal through the ownership of land in Northern England on which it is mined, but I nonetheless applaud the displacement of coal by gas in recent years.)

The argument that fossil fuels will soon run out is dead, at least for a while. The collapse of the price of oil over the past six months is the result of abundance: an inevitable consequence of the high oil prices of recent years, which stimulated innovation in hydraulic fracturing, horizontal drilling, seismology and information technology. The U.S.—the country with the oldest and most developed hydrocarbon fields—has found itself once again, surprisingly, at the top of the energy-producing league, rivaling Saudi Arabia in oil and Russia in gas.

The shale genie is now out of the bottle. Even if the current low price drives out some high-cost oil producers—in the North Sea, Canada, Russia, Iran and offshore, as well as in America—shale drillers can step back in whenever the price rebounds. As Mark Hill of Allegro Development Corporation argued last week, the frackers are currently experiencing their own version of Moore’s law: a rapid fall in the cost and time it takes to drill a well, along with a rapid rise in the volume of hydrocarbons they are able to extract.

MORE SATURDAY ESSAYS

The Coming Chinese Crackup
Dave Barry: The Greatest (Party) Generation
Putin the Improviser
Our Amazingly Plastic Brains
The Return of Anti-Semitism
Immigration and Islam: Europe’s Crisis of Faith
And the shale revolution has yet to go global. When it does, oil and gas in tight rock formations will give the world ample supplies of hydrocarbons for decades, if not centuries. Lurking in the wings for later technological breakthroughs is methane hydrate, a seafloor source of gas that exceeds in quantity all the world’s coal, oil and gas put together.

So those who predict the imminent exhaustion of fossil fuels are merely repeating the mistakes of the U.S. presidential commission that opined in 1922 that “already the output of gas has begun to wane. Production of oil cannot long maintain its present rate.” Or President Jimmy Carter when he announced on television in 1977 that “we could use up all the proven reserves of oil in the entire world by the end of the next decade.”

That fossil fuels are finite is a red herring. The Atlantic Ocean is finite, but that does not mean that you risk bumping into France if you row out of a harbor in Maine. The buffalo of the American West were infinite, in the sense that they could breed, yet they came close to extinction. It is an ironic truth that no nonrenewable resource has ever run dry, while renewable resources—whales, cod, forests, passenger pigeons—have frequently done so.

The second argument for giving up fossil fuels is that new rivals will shortly price them out of the market. But it is not happening. The great hope has long been nuclear energy, but even if there is a rush to build new nuclear power stations over the next few years, most will simply replace old ones due to close. The world’s nuclear output is down from 6% of world energy consumption in 2003 to 4% today. It is forecast to inch back up to just 6.7% by 2035, according the Energy Information Administration.

Nuclear’s problem is cost. In meeting the safety concerns of environmentalists, politicians and regulators added requirements for extra concrete, steel and pipework, and even more for extra lawyers, paperwork and time. The effect was to make nuclear plants into huge and lengthy boondoggles with no competition or experimentation to drive down costs. Nuclear is now able to compete with fossil fuels only when it is subsidized.

ENLARGE
ILLUSTRATION: HARRY CAMPBELL
As for renewable energy, hydroelectric is the biggest and cheapest supplier, but it has the least capacity for expansion. Technologies that tap the energy of waves and tides remain unaffordable and impractical, and most experts think that this won’t change in a hurry. Geothermal is a minor player for now. And bioenergy—that is, wood, ethanol made from corn or sugar cane, or diesel made from palm oil—is proving an ecological disaster: It encourages deforestation and food-price hikes that cause devastation among the world’s poor, and per unit of energy produced, it creates even more carbon dioxide than coal.

Wind power, for all the public money spent on its expansion, has inched up to—wait for it—1% of world energy consumption in 2013. Solar, for all the hype, has not even managed that: If we round to the nearest whole number, it accounts for 0% of world energy consumption.

Both wind and solar are entirely reliant on subsidies for such economic viability as they have. World-wide, the subsidies given to renewable energy currently amount to roughly $10 per gigajoule: These sums are paid by consumers to producers, so they tend to go from the poor to the rich, often to landowners (I am a landowner and can testify that I receive and refuse many offers of risk-free wind and solar subsidies).

It is true that some countries subsidize the use of fossil fuels, but they do so at a much lower rate—the world average is about $1.20 per gigajoule—and these are mostly subsidies for consumers (not producers), so they tend to help the poor, for whom energy costs are a disproportionate share of spending.

The costs of renewable energy are coming down, especially in the case of solar. But even if solar panels were free, the power they produce would still struggle to compete with fossil fuel—except in some very sunny locations—because of all the capital equipment required to concentrate and deliver the energy. This is to say nothing of the great expanses of land on which solar facilities must be built and the cost of retaining sufficient conventional generator capacity to guarantee supply on a dark, cold, windless evening.

The two fundamental problems that renewables face are that they take up too much space and produce too little energy. Consider Solar Impulse, the solar-powered airplane now flying around the world. Despite its huge wingspan (similar to a 747), slow speed and frequent stops, the only cargo that it can carry is the pilots themselves. That is a good metaphor for the limitations of renewables.

To run the U.S. economy entirely on wind would require a wind farm the size of Texas, California and New Mexico combined—backed up by gas on windless days. To power it on wood would require a forest covering two-thirds of the U.S., heavily and continually harvested.

John Constable, who will head a new Energy Institute at the University of Buckingham in Britain, points out that the trickle of energy that human beings managed to extract from wind, water and wood before the Industrial Revolution placed a great limit on development and progress. The incessant toil of farm laborers generated so little surplus energy in the form of food for men and draft animals that the accumulation of capital, such as machinery, was painfully slow. Even as late as the 18th century, this energy-deprived economy was sufficient to enrich daily life for only a fraction of the population.

Our old enemy, the second law of thermodynamics, is the problem here. As a teenager’s bedroom generally illustrates, left to its own devices, everything in the world becomes less ordered, more chaotic, tending toward “entropy,” or thermodynamic equilibrium. To reverse this tendency and make something complex, ordered and functional requires work. It requires energy.

The more energy you have, the more intricate, powerful and complex you can make a system. Just as human bodies need energy to be ordered and functional, so do societies. In that sense, fossil fuels were a unique advance because they allowed human beings to create extraordinary patterns of order and complexity—machines and buildings—with which to improve their lives.

The result of this great boost in energy is what the economic historian and philosopher Deirdre McCloskey calls the Great Enrichment. In the case of the U.S., there has been a roughly 9,000% increase in the value of goods and services available to the average American since 1800, almost all of which are made with, made of, powered by or propelled by fossil fuels.

Still, more than a billion people on the planet have yet to get access to electricity and to experience the leap in living standards that abundant energy brings. This is not just an inconvenience for them: Indoor air pollution from wood fires kills four million people a year. The next time that somebody at a rally against fossil fuels lectures you about her concern for the fate of her grandchildren, show her a picture of an African child dying today from inhaling the dense muck of a smoky fire.

Notice, too, the ways in which fossil fuels have contributed to preserving the planet. As the American author and fossil-fuels advocate Alex Epstein points out in a bravely unfashionable book, “The Moral Case for Fossil Fuels,” the use of coal halted and then reversed the deforestation of Europe and North America. The turn to oil halted the slaughter of the world’s whales and seals for their blubber. Fertilizer manufactured with gas halved the amount of land needed to produce a given amount of food, thus feeding a growing population while sparing land for wild nature.

To throw away these immense economic, environmental and moral benefits, you would have to have a very good reason. The one most often invoked today is that we are wrecking the planet’s climate. But are we?

Although the world has certainly warmed since the 19th century, the rate of warming has been slow and erratic. There has been no increase in the frequency or severity of storms or droughts, no acceleration of sea-level rise. Arctic sea ice has decreased, but Antarctic sea ice has increased. At the same time, scientists are agreed that the extra carbon dioxide in the air has contributed to an improvement in crop yields and a roughly 14% increase in the amount of all types of green vegetation on the planet since 1980.

That carbon-dioxide emissions should cause warming is not a new idea. In 1938, the British scientist Guy Callender thought that he could already detect warming as a result of carbon-dioxide emissions. He reckoned, however, that this was “likely to prove beneficial to mankind” by shifting northward the climate where cultivation was possible.

Only in the 1970s and 1980s did scientists begin to say that the mild warming expected as a direct result of burning fossil fuels—roughly a degree Celsius per doubling of carbon-dioxide concentrations in the atmosphere—might be greatly amplified by water vapor and result in dangerous warming of two to four degrees a century or more. That “feedback” assumption of high “sensitivity” remains in virtually all of the mathematical models used to this day by the U.N. Intergovernmental Panel on Climate Change, or IPCC.

And yet it is increasingly possible that it is wrong. As Patrick Michaels of the libertarian Cato Institute has written, since 2000, 14 peer-reviewed papers, published by 42 authors, many of whom are key contributors to the reports of the IPCC, have concluded that climate sensitivity is low because net feedbacks are modest. They arrive at this conclusion based on observed temperature changes, ocean-heat uptake and the balance between warming and cooling emissions (mainly sulfate aerosols). On average, they find sensitivity to be 40% lower than the models on which the IPCC relies.

If these conclusions are right, they would explain the failure of the Earth’s surface to warm nearly as fast as predicted over the past 35 years, a time when—despite carbon-dioxide levels rising faster than expected—the warming rate has never reached even two-tenths of a degree per decade and has slowed down to virtually nothing in the past 15 to 20 years. This is one reason the latest IPCC report did not give a “best estimate” of sensitivity and why it lowered its estimate of near-term warming.

Most climate scientists remain reluctant to abandon the models and take the view that the current “hiatus” has merely delayed rapid warming. A turning point to dangerously rapid warming could be around the corner, even though it should have shown up by now. So it would be wise to do something to cut our emissions, so long as that something does not hurt the poor and those struggling to reach a modern standard of living.

We should encourage the switch from coal to gas in the generation of electricity, provide incentives for energy efficiency, get nuclear power back on track and keep developing solar power and electricity storage. We should also invest in research on ways to absorb carbon dioxide from the air, by fertilizing the ocean or fixing it through carbon capture and storage. Those measures all make sense. And there is every reason to promote open-ended research to find some unexpected new energy technology.

The one thing that will not work is the one thing that the environmental movement insists upon: subsidizing wealthy crony capitalists to build low-density, low-output, capital-intensive, land-hungry renewable energy schemes, while telling the poor to give up the dream of getting richer through fossil fuels."""



In [22]:

    
web = {}
web['1'] = ['web/www.wsj.com-1426282420',
  '2015-03-15 04:42:45.409603+00',
  'Fossil Fuels Will Save the World (Really)',
  n_text]



In [ ]: